/*

	File-Name: 0-data.do
	Last Updated: 12/3/2020
	Author: Janica Magat
	Purpose: Create Master Do-file

*/

cap version 15
clear
clear results
clear all
set more off, perm

* set working directory
capture cd "~/Dropbox/Lagos EGAP Metaketa Co-PI folder/05 Analyses/Final Data Analysis/GottliebLebasMagat_LagosTax_Replication - BJPS Dataverse" 

* load blocking IDs
use "Data/lagostax_blockingvars.dta", clear

label var int1_treatment "Treatment assignment at intervention1"
label var int2_treatment "Treatment assignment at intervention2"
label var block_id_int1 "Randomization1 (assignment to C, T1) block id"
label var block_id_int2_step1 "Randomization2 (assignment to G1/T1, G2/Collective, G3/Non-Collective) block id"
label var block_id_int2_step2 "Randomization2 (assignment to T2, T3, T4, T5) block id"
label var block_int2_group "Randomization2 grouping"

* add de-identified raw baseline data
preserve
use "Data/lagostax_baseline.dta", clear

// add baseline identifiers
foreach var of varlist date-f17_other {
	rename `var' `var'_B
}

generate sample_b = 1

tempfile temp1
save `temp1'
restore

merge m:m SbjNum using `temp1'
drop _merge

// check duplicates
duplicates report SbjNum 

// proportion of yoruba
* baseline
decode mname_B, gen(market_name)
gen market_name_b = lower(market_name)
drop market_name mname_B
rename market_name_b mname_B
egen mtag_b = tag(mname_B)

bysort mname_B: egen pyoruba = mean(yoruba)
label var pyoruba "Density of Yoruba in market, baseline"

gen qyoruba = 1 if pyoruba < .33
replace qyoruba = 2 if pyoruba >= .33 & pyoruba < .66
replace qyoruba = 3 if pyoruba >= .66 & pyoruba <= 1
label define qyoruba 1 "0 - 33%" 2 "33 - 66%" 3 "66 - 100%", modify
label values qyoruba qyoruba

* add endline data
preserve
use "Data/lagostax_endline.dta", clear

// add endline identifiers
foreach var of varlist deviceid-cati_tax {
	rename `var' `var'_E
}

rename e1_interviewee_code_E SbjNum

tempfile temp2
save `temp2'
restore

merge m:m SbjNum using `temp2' // 1433 matched
drop _merge

// proportion of yoruba
* endline
generate yoruba_e = 0 if resp_ethnicity_E != . 
replace yoruba_e = 1 if resp_ethnicity_E == 1
label var yoruba_e "Ethnicity, endline"
label define yoruba_e 0 "Non-Yoruba" 1 "Yoruba"
label values yoruba_e yoruba_e

gen mname_e = lower(market_name_E)
drop market_name_E
rename mname_e market_name_e

egen mtag_e = tag(market_name_e)
bysort market_name_e: egen pyoruba_e = mean(yoruba_e)
label var pyoruba_e "Density of Yoruba in market, endline"

* add phone survey data
preserve 
use "Data/lagostax_phone.dta", clear

rename e1_interviewee SbjNum
order SbjNum, before(deviceid)

foreach var of varlist deviceid-duplicate {
rename `var' `var'_P
}

tempfile temp3
save `temp3'
restore 

merge 1:1 SbjNum using `temp3' // 1157 matched
drop _merge

// check duplicates
duplicates report SbjNum

// change the variable names into lower case
rename *, lower

// notes
* generate identifier to denote which vendors are in both the baseline and endline sample
generate notes1 = 0 if sample_b == 1 & sample_e == .
replace notes1 = 1 if sample_b == 1 & sample_e == 1

label define notes1 0 "Vendor in baseline but not in endline sample" 1 "Vendor in baseline and endline sample"
label values notes1 notes1

* generate an identifier that indicates whether the vendor is in the endline sample and has been visited by an interviewer
generate notes2 = .
replace notes2 = 1 if sample_e == 1 & consent_e == .
replace notes2 = 2 if sample_e == 1 & (consent_e == 0 | consent_e == 2)
replace notes2 = 3 if sample_e == 1 & consent_e == 1

label define notes2 1 "Vendor not found in endline" 2 "Vendor found but not interviewed in endline" 3 "Vendor found and interviewed in endline"
label values notes2 notes2

* add dates of intervention
preserve
use "Data/lagostax_interventiondate.dta", clear

tempfile temp2
save `temp2'
restore

merge m:m sbjnum using `temp2' 
drop if _merge == 2
drop _merge

* add intervention consent 
preserve
import excel "Data/endline-sample-aug2020.xlsx", clear firstrow

gen consent_placebo = consent_1 if sample == "placebo"
replace consent_1 = "" if sample == "placebo"

keep SbjNum consent_1 consent_2 consent_placebo
rename SbjNum sbjnum
rename consent_1 consent_int1
rename consent_2 consent_int2

label var consent_int1 "Intervention1 consent"
label var consent_int2 "Intervention2 consent"
label var consent_placebo "Placebo consent"

tempfile temp4
save `temp4'
restore

merge m:m sbjnum using `temp4' // 1493 matched
drop _merge 

* only keep those in endline sample (did not attrit from intervention)
keep if notes1 == 1

* save dataset
save "Data/lagostax_merged_final.dta", replace
